/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or https://opensource.org/licenses/CDDL-1.0.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
 * Copyright 2015 RackTop Systems.
 * Copyright (c) 2016, Intel Corporation.
 */

/*
 * Pool import support functions.
 *
 * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
 * these commands are expected to run in the global zone, we can assume
 * that the devices are all readable when called.
 *
 * To import a pool, we rely on reading the configuration information from the
 * ZFS label of each device.  If we successfully read the label, then we
 * organize the configuration information in the following hierarchy:
 *
 *	pool guid -> toplevel vdev guid -> label txg
 *
 * Duplicate entries matching this same tuple will be discarded.  Once we have
 * examined every device, we pick the best label txg config for each toplevel
 * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
 * update any paths that have changed.  Finally, we attempt to import the pool
 * using our derived config, and record the results.
 */

#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <libintl.h>
#include <libgen.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/dktp/fdisk.h>
#include <sys/vdev_impl.h>
#include <sys/vtoc.h>
#include <sys/fs/zfs.h>
#include <sys/efi_partition.h>

#include <thread_pool.h>
#include <libzutil.h>
#include <libnvpair.h>

#include "zutil_import.h"

#include <blkid/blkid.h>

static void
check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
    diskaddr_t size, uint_t blksz)
{
	rdsk_node_t tmpnode;
	rdsk_node_t *node;
	char sname[MAXNAMELEN];

	tmpnode.rn_name = &sname[0];
	(void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
	    diskname, partno);
	/*
	 * protect against division by zero for disk labels that
	 * contain a bogus sector size
	 */
	if (blksz == 0)
		blksz = DEV_BSIZE;
	/* too small to contain a zpool? */
	if ((size < (SPA_MINDEVSIZE / blksz)) &&
	    (node = avl_find(r, &tmpnode, NULL)))
		node->rn_nozpool = B_TRUE;
}

static void
nozpool_all_slices(avl_tree_t *r, const char *sname)
{
	char diskname[MAXNAMELEN];
	char *ptr;
	int i;

	(void) strncpy(diskname, sname, MAXNAMELEN);
	if (((ptr = strrchr(diskname, 's')) == NULL) &&
	    ((ptr = strrchr(diskname, 'p')) == NULL))
		return;
	ptr[0] = 's';
	ptr[1] = '\0';
	for (i = 0; i < NDKMAP; i++)
		check_one_slice(r, diskname, i, 0, 1);
	ptr[0] = 'p';
	for (i = 0; i <= FD_NUMPART; i++)
		check_one_slice(r, diskname, i, 0, 1);
}

static void
check_slices(avl_tree_t *r, int fd, const char *sname)
{
	struct extvtoc vtoc;
	struct dk_gpt *gpt;
	char diskname[MAXNAMELEN];
	char *ptr;
	int i;

	(void) strncpy(diskname, sname, MAXNAMELEN);
	if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
		return;
	ptr[1] = '\0';

	if (read_extvtoc(fd, &vtoc) >= 0) {
		for (i = 0; i < NDKMAP; i++)
			check_one_slice(r, diskname, i,
			    vtoc.v_part[i].p_size, vtoc.v_sectorsz);
	} else if (efi_alloc_and_read(fd, &gpt) >= 0) {
		/*
		 * on x86 we'll still have leftover links that point
		 * to slices s[9-15], so use NDKMAP instead
		 */
		for (i = 0; i < NDKMAP; i++)
			check_one_slice(r, diskname, i,
			    gpt->efi_parts[i].p_size, gpt->efi_lbasize);
		/* nodes p[1-4] are never used with EFI labels */
		ptr[0] = 'p';
		for (i = 1; i <= FD_NUMPART; i++)
			check_one_slice(r, diskname, i, 0, 1);
		efi_free(gpt);
	}
}

int
zfs_dev_flush(int fd)
{
	return (ioctl(fd, DKIOCFLUSHWRITECACHE));
}

void
zpool_open_func(void *arg)
{
	rdsk_node_t *rn = arg;
	struct stat64 statbuf;
	nvlist_t *config;
	int error;
	int num_labels = 0;
	int fd;

	if (rn->rn_nozpool)
		return;
	if ((fd = openat64(rn->rn_dfd, rn->rn_name,
	    O_RDONLY | O_CLOEXEC)) < 0) {
		/* symlink to a device that's no longer there */
		if (errno == ENOENT)
			nozpool_all_slices(rn->rn_avl, rn->rn_name);
		return;
	}

	/*
	 * Ignore failed stats.  We only want regular
	 * files, character devs and block devs.
	 */
	if (fstat64(fd, &statbuf) != 0 ||
	    (!S_ISREG(statbuf.st_mode) &&
	    !S_ISCHR(statbuf.st_mode) &&
	    !S_ISBLK(statbuf.st_mode))) {
		(void) close(fd);
		return;
	}

	/* this file is too small to hold a zpool */
	if (S_ISREG(statbuf.st_mode) &&
	    statbuf.st_size < SPA_MINDEVSIZE) {
		(void) close(fd);
		return;
	} else if (!S_ISREG(statbuf.st_mode)) {
		/*
		 * Try to read the disk label first so we don't have to
		 * open a bunch of minor nodes that can't have a zpool.
		 */
		check_slices(rn->rn_avl, fd, rn->rn_name);
	}

	error = zpool_read_label(fd, &config, &num_labels);
	if (error != 0) {
		(void) close(fd);
		return;
	}

	if (num_labels == 0) {
		(void) close(fd);
		nvlist_free(config);
		return;
	}

	(void) close(fd);

	rn->rn_config = config;
	rn->rn_num_labels = num_labels;
}

/*
 * Parse string with disk list (disks)
 * "c1t0d0 c3t0d0 c4t0d0"
 * the result is placed to the output array list
 */
static void
parse_disks(char *disks, char ***list)
{
	char	*ptr, *save;
	char	**out = NULL;
	size_t	size = 0;

	for (save = NULL, ptr = disks; *ptr != '\0'; ptr++) {
		if (save == NULL) {
			if (!isspace((int)*ptr))
				save = ptr;
			continue;
		} else if (isspace(*ptr)) {
			char symbol = *ptr;
			*ptr = '\0';
			if (size == 0)
				size++;
			size++;
			out = realloc(out, sizeof(*out) * size);
			out[size - 2] = strdup(save);
			out[size - 1] = NULL;
			*ptr = symbol;
			save = NULL;
		}
	}
	if (save != NULL) {
		if (size == 0)
			size++;
		size++;
		out = realloc(out, sizeof(*out) * size);
		out[size - 2] = strdup(save);
		out[size - 1] = NULL;
		save = NULL;
	}
	*list = out;
}

static const char * const
zpool_default_import_path[] = {
	"/dev/dsk"		/* DilOS path */
};

const char * const *
zpool_default_search_paths(size_t *count)
{
	*count = ARRAY_SIZE(zpool_default_import_path);
	return (zpool_default_import_path);
}

/*
 * Tests the name starts from dlist[n]
 * returns 0 if the name starts from the dlist[n]
 * and 1 otherwise
 */
static int
match_disk(const char *name, char **dlist)
{
	size_t	i;
	int	ret = 1;

	for (i = 0; dlist[i] != NULL; i++)
		if (strstr(name, dlist[i]) == name) {
			ret = 0;
			break;
		}

	return (ret);
}

/*
 * Use libblkid to quickly enumerate all known zfs devices.
 */
int
zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
    avl_tree_t **slice_cache)
{
	static char disk_rootd[] = DISK_ROOT "/";
	static const char *rdisk_rootd = RDISK_ROOT "/";

	int			i;
	struct dirent64 	*dp;
	char			path[MAXPATHLEN], *end;
	size_t			pathleft;
	rdsk_node_t		*slice;
	char			*filter, *disks = NULL;
	char			**dlist = NULL;

	*slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
	avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
	    offsetof(rdsk_node_t, rn_node));

	filter = getenv("ZFS_USE_ONLY_DISKS");
	if (filter != NULL) {
		/* the variable defined, filter disks */
		disks = getenv("DISKS");
		if (disks != NULL) {
			/* parse the string */
			parse_disks(disks, &dlist);
		}
	}

	/*
	 * Go through and read the label configuration information from every
	 * possible device, organizing the information according to pool GUID
	 * and toplevel GUID.
	 */
	for (i = 0; i < ARRAY_SIZE(zpool_default_import_path); i++) {
		char rdsk[MAXPATHLEN];
		const char *const dir = zpool_default_import_path[i];
		int dfd;
		DIR *dirp;

		/* use realpath to normalize the path */
		if (realpath(dir, path) == 0)
			continue;
		end = &path[strlen(path)];
		*end++ = '/';
		*end = '\0';
		pathleft = &path[sizeof (path)] - end;

		/*
		 * Using raw devices instead of block devices when we're
		 * reading the labels skips a bunch of slow operations during
		 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
		 */
		if (strcmp(path, disk_rootd) == 0)
			(void) strlcpy(rdsk, rdisk_rootd, sizeof (rdsk));
		else
			(void) strlcpy(rdsk, path, sizeof (rdsk));

		if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
		    (dirp = fdopendir(dfd)) == NULL) {
			if (dfd >= 0)
				(void) close(dfd);
			continue;
		}

		/*
		 * This is not MT-safe, but we have no MT consumers of libzfs
		 */
		while ((dp = readdir64(dirp)) != NULL) {
			const char *name = dp->d_name;
			if (name[0] == '.' &&
			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
				continue;
			if ((dlist != NULL) && (match_disk(name, dlist) != 0))
				continue;

			slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
			slice->rn_name = zutil_strdup(hdl, name);
			slice->rn_avl = *slice_cache;
			slice->rn_dfd = dup(dfd);
			slice->rn_hdl = hdl;
			slice->rn_nozpool = B_FALSE;
			strlcpy(end, slice->rn_name, pathleft);
			slice->rn_path = zutil_strdup(hdl, path);

			pthread_mutex_lock(lock);
			avl_add(*slice_cache, slice);
			pthread_mutex_unlock(lock);
		}
		(void) closedir(dirp);
	}

	if (dlist != NULL) {
		for (i = 0; dlist[i] != NULL; i++)
			free(dlist[i]);
		free(dlist);
	}

	return (0);
}

/*
 * Linux persistent device strings for vdev labels
 *
 * based on libudev for consistency with libudev disk add/remove events
 */

typedef struct vdev_dev_strs {
	char	vds_devid[128];
	char	vds_devphys[128];
} vdev_dev_strs_t;

/*
 * Wait up to timeout_ms for udev to set up the device node.  The device is
 * considered ready when libudev determines it has been initialized, all of
 * the device links have been verified to exist, and it has been allowed to
 * settle.  At this point the device the device can be accessed reliably.
 * Depending on the complexity of the udev rules this process could take
 * several seconds.
 */
int
zpool_label_disk_wait(const char *path, int timeout_ms)
{
	int settle_ms = 50;
	long sleep_ms = 10;
	hrtime_t start, settle;
	struct stat64 statbuf;

	start = gethrtime();
	settle = 0;

	do {
		errno = 0;
		if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
			if (settle == 0)
				settle = gethrtime();
			else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
				return (0);
		} else if (errno != ENOENT) {
			return (errno);
		}

		usleep(sleep_ms * MILLISEC);
	} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);

	return (ENODEV);
}

/*
 * Update a leaf vdev's persistent device strings
 *
 * - only applies for a dedicated leaf vdev (aka whole disk)
 * - updated during pool create|add|attach|import
 * - used for matching device matching during auto-{online,expand,replace}
 * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
 * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
 *
 * single device node example:
 * 	devid:		'scsi-MG03SCA300_350000494a8cb3d67-part1'
 * 	phys_path:	'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
 *
 * multipath device node example:
 * 	devid:		'dm-uuid-mpath-35000c5006304de3f'
 *
 * We also store the enclosure sysfs path for turning on enclosure LEDs
 * (if applicable):
 *	vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
 */
void
update_vdev_config_dev_strs(nvlist_t *nv)
{
	const char *env, *type, *path;
	uint64_t wholedisk = 0;

	/*
	 * For the benefit of legacy ZFS implementations, allow
	 * for opting out of devid strings in the vdev label.
	 *
	 * example use:
	 *	env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
	 *
	 * explanation:
	 * Older OpenZFS implementations had issues when attempting to
	 * display pool config VDEV names if a "devid" NVP value is
	 * present in the pool's config.
	 *
	 * For example, a pool that originated on illumos platform would
	 * have a devid value in the config and "zpool status" would fail
	 * when listing the config.
	 *
	 * A pool can be stripped of any "devid" values on import or
	 * prevented from adding them on zpool create|add by setting
	 * ZFS_VDEV_DEVID_OPT_OUT.
	 */
	env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
	if (env && (strtoul(env, NULL, 0) > 0 ||
	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
		return;
	}

	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
	    strcmp(type, VDEV_TYPE_DISK) != 0) {
		return;
	}
	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
		return;
	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);

	/* clear out any stale entries */
	(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
	(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
	(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
}

void
update_vdevs_config_dev_sysfs_path(nvlist_t *config)
{
}
